BYU is off the charts

By: Laura Stickells

April 28, 2021

Step 1: Scrape the draft data from Pro Football Reference
urlprefix <- 'https://www.pro-football-reference.com/years/'
urlend <- '/draft.htm'
startyr <- 2000
endyr <- 2020

draftedPlayers <- data.frame()
for (i in startyr:endyr) {
  URL <- paste(urlprefix, as.character(i), urlend, sep = "")
  table <- URL %>%
    read_html() %>%
    html_node('table') %>%
    html_table()              
  table$Year <- i
  draftedPlayers <- rbind(table, draftedPlayers)
}

cols = names(draftedPlayers)
cols[1] = 'Rnd'
cols[2] = 'Pick'
cols[3] = 'Team'
cols[4] = 'Player'
cols[5] = 'Pos'
cols[6] = 'Age'
cols[7] = 'Last Year'
cols[8] = '1st Team All Pro'
cols[9] = 'Pro Bowl'
cols[10] = 'Years as Starter'
cols[11] = 'Career AV'
cols[12] = 'Draft Team AV'
cols[13] = 'Games Played'
cols[14] = 'Passes Completed'
cols[15] = 'Pass Attempts'
cols[16] = 'Passing Yds'
cols[17] = 'Passing TDs'
cols[18] = 'Interceptions Thrown'
cols[19] = 'Rushing Attepts'
cols[20] = 'Rushing Yds'
cols[21] = 'Rushing TDs'
cols[22] = 'Receptions'
cols[23] = 'Recieving Yds'
cols[24] = 'Recieving TDs'
cols[25] = 'Solo Tackles'
cols[26] = 'Interceptions (Def)'
cols[27] = 'Sacks (Def)'
cols[28] = 'College/Univ'
cols[29] = 'other'
cols[30] = 'Draft Year'
names(draftedPlayers) = cols

draftedPlayers <- draftedPlayers[draftedPlayers$Player != "Player",] #remove title rows
draftedPlayers <- select(draftedPlayers, -c('other')) #remove other

draftedPlayers$Team[draftedPlayers$Team == 'GNB'] <- 'GB'
draftedPlayers$Team[draftedPlayers$Team == 'KAN'] <- 'KC'
draftedPlayers$Team[draftedPlayers$Team %in% c( 'LVR', 'OAK')] <- 'LV'
draftedPlayers$Team[draftedPlayers$Team == 'NWE'] <- 'NE'
draftedPlayers$Team[draftedPlayers$Team == 'NOR'] <- 'NO'
draftedPlayers$Team[draftedPlayers$Team == 'SFO'] <- 'SF'
draftedPlayers$Team[draftedPlayers$Team == 'TAM'] <- 'TB'
draftedPlayers$Team[draftedPlayers$Team == 'SDG'] <- 'LAC'
draftedPlayers$Team[draftedPlayers$Team == 'STL'] <- 'LAR'

draftedPlayers <- draftedPlayers %>%
  mutate_at(vars(-c(Team, Player, Pos, `College/Univ`)), funs(as.numeric(.)))
## Warning: `funs()` was deprecated in dplyr 0.8.0.
## Please use a list of either functions or lambdas: 
## 
##   # Simple named list: 
##   list(mean = mean, median = median)
## 
##   # Auto named with `tibble::lst()`: 
##   tibble::lst(mean, median)
## 
##   # Using lambdas
##   list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
draftedPlayers$`College/Univ` <- gsub("St.", "State", draftedPlayers$`College/Univ`, fixed=TRUE)
draftedPlayers$`College/Univ` <- gsub("Col.", "College", draftedPlayers$`College/Univ`, fixed=TRUE)
draftedPlayers$`College/Univ` <- gsub(" (FL)", "", draftedPlayers$`College/Univ`, fixed=TRUE)
draftedPlayers$`College/Univ` <- gsub("North Carolina State", "NC State", draftedPlayers$`College/Univ`, fixed=TRUE)
draftedPlayers$`College/Univ` <- gsub("Mississippi", "Ole Miss", draftedPlayers$`College/Univ`, fixed=TRUE)
draftedPlayers$`College/Univ` <- gsub("North Carolina State", "NC State", draftedPlayers$`College/Univ`, fixed=TRUE)
draftedPlayers$`College/Univ` <- gsub("Ole Miss State", "Mississippi State", draftedPlayers$`College/Univ`, fixed=TRUE)
draftedPlayers$`College/Univ` <- gsub("Hawaii", "Hawai'i", draftedPlayers$`College/Univ`, fixed=TRUE)
draftedPlayers$`College/Univ` <- gsub("Central Florida", "UCF", draftedPlayers$`College/Univ`, fixed=TRUE)
draftedPlayers$`College/Univ` <- gsub("Southern Miss", "Southern Mississippi", draftedPlayers$`College/Univ`, fixed=TRUE)
draftedPlayers$`College/Univ` <- gsub("Cal Poly-San Luis Obispo", "Cal Poly", draftedPlayers$`College/Univ`, fixed=TRUE)
draftedPlayers$`College/Univ` <- gsub("California-Davis", "UC Davis", draftedPlayers$`College/Univ`, fixed=TRUE)
draftedPlayers$`College/Univ` <- gsub("Massachusetts", "UMass", draftedPlayers$`College/Univ`, fixed=TRUE)
Step 2: Load the school data with the colors and logos
teamInfo <- cfbd_team_info(conference = NULL, only_fbs = TRUE, year = NULL)
logos <- data.frame(matrix(unlist(teamInfo$logos), nrow=length(teamInfo$logos), byrow=TRUE),stringsAsFactors=FALSE)
teamInfo <- cbind(teamInfo, logos) %>%
  rename(
    logo1 = X1,
    logo2 = X2
    )
teamInfo <- teamInfo[ -c(12) ]
Step 3: Graph BYU’s number of draft picks per year.

Seasons hack ensures that there is a 0 data point for years when no players are drafted.

BYUDraftTimeline <- draftedPlayers %>%
  filter(`College/Univ` %in% c("BYU")) %>%
  group_by(`Draft Year`) %>%
  summarise(
    numPicks = n()
  )

seasonsHack <- draftedPlayers %>%
  group_by(`Draft Year`) %>%
  summarise(
    blank = 0
  )
  
BYUDraftTimeline <- merge(x = seasonsHack, y = BYUDraftTimeline, by = "Draft Year", all = TRUE)
BYUDraftTimeline[is.na(BYUDraftTimeline)] <- 0
BYUDraftTimeline <- BYUDraftTimeline %>%
  filter(`Draft Year` >= 2005)

rm(seasonsHack)

BYUDraftTimeline[nrow(BYUDraftTimeline) + 1,] <- c(2021, 0, 5)

graph <- BYUDraftTimeline %>%
  ggplot(aes(x = `Draft Year`, y = numPicks, color = "#001E4C", linetype = factor(ifelse(`Draft Year`==2021,"Solid", "dotted")))) +
  geom_line(size = 1) +
  geom_segment(x = 2020, y = 0, xend = 2021, yend = 5, color="#001E4C", linetype="dotted", size = 1) +
  geom_point(alpha=.7) +
  scale_colour_identity() +
  xlim(2005, 2021) +
  ylim(0, 5) +
  theme_fivethirtyeight() +
  theme(
    legend.position = "none",
    legend.title = element_blank(),
    strip.text = element_text(face = "bold"),
    axis.title.y = element_text(),
    axis.title.x = element_text(),
    axis.text.x = element_text(angle = 45, vjust = .7, face = "bold"),
    panel.grid.major.x = element_blank()
    ) +
  labs(
    y = "Draft Picks",
    x = "Season",
    title = "BYU draft picks by season",
    subtitle = "The Cougars haven't had more than one player selected in the draft since 2009",
    caption = "Data: @pfrer | Plot: @LauraStickells"
  ) +
  scale_x_continuous(breaks = seq(min(BYUDraftTimeline$`Draft Year`), max(BYUDraftTimeline$`Draft Year`), by = 1))
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.
graph

Step 3: Create a correlation table with p values for School and Career Average Value.
all_schools <- draftedPlayers %>%
    filter(!is.na(`College/Univ`), !is.na(`Career AV`), !is.na(Pick), `Draft Year` > 2000) %>%
    group_by(`College/Univ`) %>%
    summarise(
        n = n()
    ) %>%
    filter(n >= 13) %>%
    select(`College/Univ`)
all_schools <- all_schools[['College/Univ']]

cor_p_table <- data.frame(matrix(ncol=3, nrow=0, dimnames=list(NULL, c("School", "Cor", "pvalue"))))

for (i in all_schools) {
  temp_table <- data.frame()
  temp_table <- draftedPlayers %>%
    filter(`College/Univ` == i, `Draft Year` > 2010)
  res <- cor.test(temp_table$Pick, temp_table$`Career AV`)
  new_row <- c(i, round(res$estimate, 2), round(res$p.value, 3))
  cor_p_table[nrow(cor_p_table) + 1, ] <- new_row 
}

cor_p_table <- transform(cor_p_table, Cor = as.numeric(Cor))
cor_p_table <- transform(cor_p_table, pvalue = as.numeric(pvalue))
Step 4: Create a table with the average career value for a schools draft picks with the number of players that have been drafted. Must have more than five picks.
schoolAverageValue <- draftedPlayers %>%
    filter(!is.na(`College/Univ`), !is.na(`Career AV`), !is.na(Pick), `Draft Year` >= 2010) %>%
    group_by(`College/Univ`) %>%
    summarise(
        n = n(),
        avg = mean(`Career AV`)
    ) %>%
  filter(n >= 5)
Step 5: Select all BYU players selected since 2010
draftedPlayers %>%
    filter(`College/Univ` == "BYU", `Draft Year` >= 2010) %>%
  select(Player)
## # A tibble: 7 x 1
##   Player         
##   <chr>          
## 1 Sione Takitaki 
## 2 Fred Warner    
## 3 Jamaal Williams
## 4 Bronson Kaufusi
## 5 Kyle Van Noy   
## 6 Ezekiel Ansah  
## 7 Dennis Pitta
Step 6: Create a table showing how BYU has fared in the NFL compared to the players from the same position that were selected before them.
BYUplayerInfo <- draftedPlayers %>%
  filter(`College/Univ` == "BYU", `Draft Year` >= 2010) 

player <- 'Fred Warner'
position <- BYUplayerInfo$Pos[BYUplayerInfo$Player== player]
year <- BYUplayerInfo$`Draft Year`[BYUplayerInfo$Player== player]
pick <- BYUplayerInfo$Pick[BYUplayerInfo$Player== player]


pickedBeforeData <- draftedPlayers %>%
  filter(`Pos` == position, `Draft Year` == year, Pick <= pick)
pickedBeforeData[is.na(pickedBeforeData)] <- 0

meanPickedBeforeData <- mean(pickedBeforeData$`Career AV`)

pickedBeforeGraph <- pickedBeforeData %>%
  ggplot(aes(x = reorder(Player, Pick) , y = `Career AV`, fill = factor(ifelse(`College/Univ`=="BYU","Blue","Normal")))) +
  geom_col(alpha = 0.7) +
  scale_fill_manual(name = "College/Univ", values=c("#001E4C","grey50")) +
  geom_hline(yintercept = meanPickedBeforeData, color = "black", linetype = "dashed") +
  geom_label(aes(label = paste('Rnd', Rnd, '|', 'Pk', Pick, '\n', 'Career AV:', `Career AV`)), fill = "white", vjust = -0.1, size = 3, fontface = "bold") +
  ylim(0, max(pickedBeforeData$`Career AV`, na.rm = TRUE)+10) +
  ggthemes::theme_fivethirtyeight() +
  theme(
    legend.position = "none",
    strip.text = element_text(face = "bold"),
    axis.title.y = element_text(),
    axis.title.x = element_blank(),
    axis.text.x = element_text(angle = 45, vjust = .7, face = "bold"),
    panel.grid.major.x = element_blank()
    ) +
  labs(
    y = "Career AV",
    title = paste(player, " Career AV compared to ", position, "s picked before him in ", year, sep = ""),
    subtitle = "",
    caption = "Data: @ProFootballReference | Plot: @LauraStickells"
  ) 

pickedBeforeGraph